Trail 1:Initailly when I tried to run Homoney integration after runing pca on indiviudal samples and then merged the samples it failed. Trail2: I will merge the samples after findvariables step and then perform scaling and pca and then run harmony. Trail3: Merge the sample datasets before performing any steps of QC to save time and computational power trail4: it does not make any sense to plot the viloin plot with two samples in one plot. so we are going back, removing the merged datstet perfrom the QC with individual samples and then merging and then going for the data normalization step (downstream analysis)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#library(Matrix)
library(Seurat)
## Warning: package 'Seurat' was built under R version 4.4.1
## Loading required package: SeuratObject
## Warning: package 'SeuratObject' was built under R version 4.4.1
## Loading required package: sp
## Warning: package 'sp' was built under R version 4.4.1
##
## Attaching package: 'SeuratObject'
## The following objects are masked from 'package:base':
##
## intersect, t
library(patchwork)
## Warning: package 'patchwork' was built under R version 4.4.1
Load the data: Rename the file names to barcodes, features and matrix as Read10X does not recognise other file name.
sam1 <- Read10X("~/Documents/Drive G/Personal Project/sc_RNAseq tutorial/sample/sample_1")
sam2 <- Read10X("~/Documents/Drive G/Personal Project/sc_RNAseq tutorial/sample/sample_2")
Convert the files into seurat objects
sam1 <- CreateSeuratObject(counts = sam1, min.cells = 3, min.features = 200, project = "sam1")
sam2 <- CreateSeuratObject(counts = sam2, min.cells= 3, min.features = 200, project ="sam2")
mitochondrial counts
sam1[["percent.mt"]] <- PercentageFeatureSet(sam1, pattern = "^MT-")
sam2[["percent.mt"]] <- PercentageFeatureSet(sam2, pattern = "^MT-")
View
#View(sam1@meta.data)
#View(sam2@meta.data)
Violin plot
#VlnPlot(Merged_sam, features = c("nFeature_RNA","nCount_RNA","percent.mt"), ncol = 3)
VlnPlot(sam1, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.
## Warning: The `slot` argument of `FetchData()` is deprecated as of SeuratObject 5.0.0.
## ℹ Please use the `layer` argument instead.
## ℹ The deprecated feature was likely used in the Seurat package.
## Please report the issue at <https://github.com/satijalab/seurat/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: `PackageCheck()` was deprecated in SeuratObject 5.0.0.
## ℹ Please use `rlang::check_installed()` instead.
## ℹ The deprecated feature was likely used in the Seurat package.
## Please report the issue at <https://github.com/satijalab/seurat/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
VlnPlot(sam2, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.
perform QC
#Merged_sam <- subset(Merged_sam, subset = nFeature_RNA > 200 & nFeature_RNA < 6500 & percent.mt < 15 )
sam1 <- subset(sam1, subset = nFeature_RNA > 200 & nFeature_RNA < 6500 & percent.mt < 15)
sam2 <- subset(sam2, subset = nFeature_RNA > 200 & nFeature_RNA < 6500 & percent.mt < 15)
View violin plot after QC
#VlnPlot(Merged_sam, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3 )
VlnPlot(sam1, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.
VlnPlot(sam2, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.
Merge samples
Merged_sam <- merge(sam1,sam2, add.cell.ids = c("sam1","sam2"), project = "Merged_sam")
#rm(Merged_sam)
Pre-processing
Normalization
Merged_sam <- NormalizeData(Merged_sam)
## Normalizing layer: counts.sam1
## Normalizing layer: counts.sam2
#sam1 <- NormalizeData(sam1)
#sam2 <- NormalizeData(sam2)
Finding variable genes
Merged_sam <- FindVariableFeatures(Merged_sam, selection.method = "vst", nfeatures = 2000)
## Finding variable features for layer counts.sam1
## Finding variable features for layer counts.sam2
#sam1 <- FindVariableFeatures(sam1, selection.method = "vst", nfeatures = 2000)
#sam2 <- FindVariableFeatures(sam2, selection.method = "vst", nfeatures = 2000)
Scale data
Merged_sam <- ScaleData(Merged_sam)
## Centering and scaling data matrix
#sam1 <- ScaleData(sam1)
#sam2 <- ScaleData(sam2)
Run PCA
Merged_sam <- RunPCA(Merged_sam)
## PC_ 1
## Positive: FTL, SRGN, S100A4, CD52, FCER1G, HLA-DPB1, S100A9, HLA-DPA1, CTSS, FCGR3A
## SCGB2A2, C1QB, C1QA, CCL5, C1orf162, NKG7, AIF1, ALOX5AP, LYZ, CD14
## HLA-DRA, C1QC, S100A8, GZMA, TFF3, APOE, CST7, GNLY, MS4A6A, VSIG4
## Negative: C1R, CALD1, C1S, ID4, RARRES2, CCDC80, UPK3B, COL1A2, KLK11, TM4SF1
## PROCR, PDPN, NNMT, ID1, COL1A1, CFI, IGFBP6, C3, PTGIS, EGFL6
## TPM2, SERPING1, RARRES1, COL6A2, PLA2G2A, CFH, HTRA1, PLAT, COL8A1, CALB2
## PC_ 2
## Positive: KRT19, PPDPF, XBP1, STARD10, SCGB2A2, TFF3, KRT18, COX6C, S100P, CD24
## BAMBI, SCGB1D2, METRN, KRT8, ADIRF, NQO1, FXYD3, AGR2, CCL5, RAB11FIP1
## SMIM22, ELF3, NKG7, MDK, SNCG, GZMA, GNLY, MLPH, CRABP2, CCND1
## Negative: SLC11A1, CYBB, SPI1, CD68, CD163, VSIG4, CD14, CFD, AIF1, MS4A6A
## LYZ, C3AR1, CPVL, MARCO, HLA-DRB1, MS4A4A, CTSB, MS4A7, C1QC, HLA-DRA
## C5AR1, CTSS, LST1, CD74, ENG, FN1, S100A8, FCER1G, CSF1R, MAFB
## PC_ 3
## Positive: NKG7, CCL5, GZMA, CST7, GNLY, FGFBP2, GZMB, MALAT1, CTSW, CD52
## KLRB1, CD7, GZMH, ZFP36L2, TRBC1, TRBC2, IL32, CLIC3, GYPC, CD3D
## TRAC, CCL4, CD3G, TRDC, RARRES3, S100A4, SPOCK2, DUSP2, C12orf75, CCL4L2
## Negative: KRT19, COX6C, STARD10, XBP1, TFF3, KRT18, SCGB2A2, S100P, CCND1, ADIRF
## BAMBI, CD24, KRT8, RAB11FIP1, HSPB1, AGR2, NANS, METRN, FXYD3, MDK
## SCGB1D2, ELF3, UQCRQ, PPFIA1, SMIM22, NQO1, SPINT2, SNCG, CYB5A, MLPH
## PC_ 4
## Positive: SCGB2A2, TFF3, XBP1, SCGB1D2, KRT19, S100P, FTL, CD24, ADIRF, STARD10
## FXYD3, KRT18, CCND1, AGR2, COX6C, CGA, KRT8, RAB11FIP1, APOD, SMIM22
## CEMIP, C1QB, APOE, CLU, CA12, IL13RA2, HP, TFPI2, WFDC2, CHI3L1
## Negative: TYMS, MKI67, NUSAP1, PCLAF, CDKN3, BIRC5, TOP2A, STMN1, TK1, CCNB2
## CENPF, TPX2, PTTG1, DLGAP5, CDCA3, UBE2C, UBE2T, HMGB2, CEP55, HMMR
## MAD2L1, RRM2, ZWINT, CCNA2, CENPA, GTSE1, CDK1, KIF23, CENPM, ASPM
## PC_ 5
## Positive: BDKRB1, ARSI, EMP1, SERPINE1, SBSN, MAP1B, TPPP3, ANGPTL2, DPYSL3, HAS1
## FSTL3, SERPINB2, AHNAK2, C11orf96, CAV1, HSPG2, LUM, NOV, PODXL, COL5A1
## CADM3, MEG3, SPNS2, LINC01133, FLNC, TIMP3, CAV2, S100A3, APOA1, GJB2
## Negative: IL13RA2, CEMIP, GABRB2, ADAMTS9, EPGN, FMO1, ADH1B, CILP, PCP4, CLIC5
## TCEAL6, STEAP1, TCEAL2, CSRP2, FLRT2, ALDH8A1, CHI3L1, STEAP2, MGARP, OSR1
## FST, MYLK, SLC40A1, CHRDL1, TRDC, CST6, PRRX1, ETV7, NR2F1, GRIA2
#sam1 <- RunPCA(sam1)
#sam2 <- RunPCA(sam2)
DimPlot(Merged_sam, reduction = "pca") #+ NoLegend()
DimPlot(Merged_sam, reduction = "pca", dim = c(1,5))
VizDimLoadings(Merged_sam, dims = 1:2, reduction = "pca")
Run Harmony Integration
#install.packages('harmony')
library(harmony)
## Warning: package 'harmony' was built under R version 4.4.1
## Loading required package: Rcpp
## Warning: package 'Rcpp' was built under R version 4.4.1
## Run Harmony Integration
options(repr.plot.height = 2.5, repr.plot.width = 6)
Merged_sam <- RunHarmony(Merged_sam,"orig.ident", plot_convergence = TRUE)
## Transposing data matrix
## Initializing state using k-means centroids initialization
## Harmony 1/10
## Harmony 2/10
## Harmony 3/10
## Harmony 4/10
## Harmony 5/10
## Harmony converged after 5 iterations
## Warning: The `slot` argument of `GetAssayData()` is deprecated as of SeuratObject 5.0.0.
## ℹ Please use the `layer` argument instead.
## ℹ The deprecated feature was likely used in the Seurat package.
## Please report the issue at <https://github.com/satijalab/seurat/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
Run Clustering
Merged_sam <- RunUMAP(Merged_sam, reduction = 'harmony', dims = 1:30,
reduction.name = 'umap.harmony')
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 11:59:21 UMAP embedding parameters a = 0.9922 b = 1.112
## 11:59:21 Read 12315 rows and found 30 numeric columns
## 11:59:21 Using Annoy for neighbor search, n_neighbors = 30
## 11:59:21 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 11:59:22 Writing NN index file to temp file /var/folders/ql/4mkyx9c51q3d4h02kxsxhtj80000gn/T//RtmpfjxHbq/file33984e632c9c
## 11:59:22 Searching Annoy index using 1 thread, search_k = 3000
## 11:59:24 Annoy recall = 100%
## 11:59:24 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 11:59:24 Initializing from normalized Laplacian + noise (using RSpectra)
## 11:59:25 Commencing optimization for 200 epochs, with 551504 positive edges
## 11:59:25 Using rng type: pcg
## 11:59:30 Optimization finished
Merged_sam <- FindNeighbors(Merged_sam, reduction = 'harmony', dims = 1:30)
## Computing nearest neighbor graph
## Warning: package 'future' was built under R version 4.4.1
## Computing SNN
Merged_sam <- FindClusters(Merged_sam, resolution = 0.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 12315
## Number of edges: 509960
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9189
## Number of communities: 14
## Elapsed time: 1 seconds
Merged_sam <- FindClusters(Merged_sam, resolution = 0.1)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 12315
## Number of edges: 509960
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9768
## Number of communities: 8
## Elapsed time: 1 seconds
DimPlot(Merged_sam, reduction ="umap.harmony", group.by = "orig.ident")
p2 <- DimPlot(
Merged_sam,
reduction = "umap.harmony",
group.by = c("orig.ident","RNA_snn_res.0.1"),
combine = FALSE, label.size = 2
)
p2
## [[1]]
##
## [[2]]
Joining layers
Merged_sam <- JoinLayers(Merged_sam)
Finding marker genes in the clusters.
Merged_sam.markers <- FindAllMarkers(Merged_sam, only.pos = TRUE)
## Calculating cluster 0
## Calculating cluster 1
## Calculating cluster 2
## Calculating cluster 3
## Calculating cluster 4
## Calculating cluster 5
## Calculating cluster 6
## Calculating cluster 7
Filtering the marker gene list to keep the most upregulated genes
top_markers<- Merged_sam.markers %>%
group_by(cluster) %>%
dplyr::filter(avg_log2FC > 1)
write.csv(top_markers, file = "top_markers.csv", row.names = FALSE)
#top10 <- Merged_sam.markers %>%
# group_by(cluster) %>%
#top_n(n = 1, wt = avg_log2FC)
Merged_sam.markers %>%
group_by(cluster) %>%
dplyr::filter(avg_log2FC > 1) %>%
slice_head(n = 10) %>%
ungroup() -> top10
Plotting the upregulated genes in each cluster
FeaturePlot(Merged_sam, features = c("S100P", "SLC11A1", "C1R", "KLRB1", "TRAC", "NUSAP1", "HLA-DQB2", "TCL1A"))
DoHeatmap(Merged_sam, features = top10$gene) + NoLegend()
## Warning in DoHeatmap(Merged_sam, features = top10$gene): The following features
## were omitted as they were not found in the scale.data slot for the RNA assay:
## TSPAN33, AL138899.1, GZMM, CD2, CD3E, KLRD1
Tryig to run SingleR on the sample clusters for cell annotation
Getting normalized and raw counts
raw_counts <- LayerData(Merged_sam, assay = "RNA", layer = 'counts')
raw_counts[c("S100P", "SLC11A1", "C1R", "KLRB1", "TRAC"),1:2]
## 5 x 2 sparse Matrix of class "dgCMatrix"
## sam1_AAACCCAAGACGGTTG-1 sam1_AAACCCAAGGTCACAG-1
## S100P 4 .
## SLC11A1 . .
## C1R 34 .
## KLRB1 . .
## TRAC . .
norm_counts <- LayerData(Merged_sam, assay = "RNA", layer = 'data')
norm_counts[c("S100P", "SLC11A1", "C1R", "KLRB1", "TRAC"),1:2]
## 5 x 2 sparse Matrix of class "dgCMatrix"
## sam1_AAACCCAAGACGGTTG-1 sam1_AAACCCAAGGTCACAG-1
## S100P 1.041993 .
## SLC11A1 . .
## C1R 2.809182 .
## KLRB1 . .
## TRAC . .
Install Tidyverse and celldex
#install.packages("tidyverse")
#BiocManager::install("celldex")
library(tidyverse)
## Warning: package 'ggplot2' was built under R version 4.4.1
## Warning: package 'purrr' was built under R version 4.4.1
## Warning: package 'lubridate' was built under R version 4.4.1
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.5
## ✔ ggplot2 3.5.2 ✔ stringr 1.5.1
## ✔ lubridate 1.9.4 ✔ tibble 3.2.1
## ✔ purrr 1.0.4 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
#install.packages("ggplot2")
library(ggplot2)
library(celldex)
## Loading required package: SummarizedExperiment
## Warning: package 'SummarizedExperiment' was built under R version 4.4.1
## Loading required package: MatrixGenerics
## Warning: package 'MatrixGenerics' was built under R version 4.4.2
## Loading required package: matrixStats
## Warning: package 'matrixStats' was built under R version 4.4.1
##
## Attaching package: 'matrixStats'
##
## The following object is masked from 'package:dplyr':
##
## count
##
##
## Attaching package: 'MatrixGenerics'
##
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
##
## Loading required package: GenomicRanges
## Warning: package 'GenomicRanges' was built under R version 4.4.1
## Loading required package: stats4
## Loading required package: BiocGenerics
## Warning: package 'BiocGenerics' was built under R version 4.4.1
##
## Attaching package: 'BiocGenerics'
##
## The following objects are masked from 'package:lubridate':
##
## intersect, setdiff, union
##
## The following object is masked from 'package:SeuratObject':
##
## intersect
##
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
##
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
##
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, saveRDS, setdiff,
## table, tapply, union, unique, unsplit, which.max, which.min
##
## Loading required package: S4Vectors
## Warning: package 'S4Vectors' was built under R version 4.4.1
##
## Attaching package: 'S4Vectors'
##
## The following objects are masked from 'package:lubridate':
##
## second, second<-
##
## The following object is masked from 'package:tidyr':
##
## expand
##
## The following objects are masked from 'package:dplyr':
##
## first, rename
##
## The following object is masked from 'package:utils':
##
## findMatches
##
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
##
## Loading required package: IRanges
## Warning: package 'IRanges' was built under R version 4.4.2
##
## Attaching package: 'IRanges'
##
## The following object is masked from 'package:lubridate':
##
## %within%
##
## The following object is masked from 'package:purrr':
##
## reduce
##
## The following object is masked from 'package:sp':
##
## %over%
##
## The following objects are masked from 'package:dplyr':
##
## collapse, desc, slice
##
## Loading required package: GenomeInfoDb
## Warning: package 'GenomeInfoDb' was built under R version 4.4.2
## Loading required package: Biobase
## Warning: package 'Biobase' was built under R version 4.4.1
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
##
## Attaching package: 'Biobase'
##
## The following object is masked from 'package:MatrixGenerics':
##
## rowMedians
##
## The following objects are masked from 'package:matrixStats':
##
## anyMissing, rowMedians
##
##
## Attaching package: 'SummarizedExperiment'
##
## The following object is masked from 'package:Seurat':
##
## Assays
##
## The following object is masked from 'package:SeuratObject':
##
## Assays
Get reference dataset from celldex
ref <- celldex::HumanPrimaryCellAtlasData()
unique(ref$label.main)
## [1] "DC" "Smooth_muscle_cells" "Epithelial_cells"
## [4] "B_cell" "Neutrophils" "T_cells"
## [7] "Monocyte" "Erythroblast" "BM & Prog."
## [10] "Endothelial_cells" "Gametocytes" "Neurons"
## [13] "Keratinocytes" "HSC_-G-CSF" "Macrophage"
## [16] "NK_cell" "Embryonic_stem_cells" "Tissue_stem_cells"
## [19] "Chondrocytes" "Osteoblasts" "BM"
## [22] "Platelets" "Fibroblasts" "iPS_cells"
## [25] "Hepatocytes" "MSC" "Neuroepithelial_cell"
## [28] "Astrocyte" "HSC_CD34+" "CMP"
## [31] "GMP" "MEP" "Myelocyte"
## [34] "Pre-B_cell_CD34-" "Pro-B_cell_CD34+" "Pro-Myelocyte"
unique(ref$label.fine)
## [1] "DC:monocyte-derived:immature"
## [2] "DC:monocyte-derived:Galectin-1"
## [3] "DC:monocyte-derived:LPS"
## [4] "DC:monocyte-derived"
## [5] "Smooth_muscle_cells:bronchial:vit_D"
## [6] "Smooth_muscle_cells:bronchial"
## [7] "Epithelial_cells:bronchial"
## [8] "B_cell"
## [9] "Neutrophil"
## [10] "T_cell:CD8+_Central_memory"
## [11] "T_cell:CD8+"
## [12] "T_cell:CD4+"
## [13] "T_cell:CD8+_effector_memory_RA"
## [14] "T_cell:CD8+_effector_memory"
## [15] "T_cell:CD8+_naive"
## [16] "Monocyte"
## [17] "Erythroblast"
## [18] "BM"
## [19] "DC:monocyte-derived:rosiglitazone"
## [20] "DC:monocyte-derived:AM580"
## [21] "DC:monocyte-derived:rosiglitazone/AGN193109"
## [22] "DC:monocyte-derived:anti-DC-SIGN_2h"
## [23] "Endothelial_cells:HUVEC"
## [24] "Endothelial_cells:HUVEC:Borrelia_burgdorferi"
## [25] "Endothelial_cells:HUVEC:IFNg"
## [26] "Endothelial_cells:lymphatic"
## [27] "Endothelial_cells:HUVEC:Serum_Amyloid_A"
## [28] "Endothelial_cells:lymphatic:TNFa_48h"
## [29] "T_cell:effector"
## [30] "T_cell:CCR10+CLA+1,25(OH)2_vit_D3/IL-12"
## [31] "T_cell:CCR10-CLA+1,25(OH)2_vit_D3/IL-12"
## [32] "Gametocytes:spermatocyte"
## [33] "DC:monocyte-derived:A._fumigatus_germ_tubes_6h"
## [34] "Neurons:ES_cell-derived_neural_precursor"
## [35] "Keratinocytes"
## [36] "Keratinocytes:IL19"
## [37] "Keratinocytes:IL20"
## [38] "Keratinocytes:IL22"
## [39] "Keratinocytes:IL24"
## [40] "Keratinocytes:IL26"
## [41] "Keratinocytes:KGF"
## [42] "Keratinocytes:IFNg"
## [43] "Keratinocytes:IL1b"
## [44] "HSC_-G-CSF"
## [45] "DC:monocyte-derived:mature"
## [46] "Monocyte:anti-FcgRIIB"
## [47] "Macrophage:monocyte-derived:IL-4/cntrl"
## [48] "Macrophage:monocyte-derived:IL-4/Dex/cntrl"
## [49] "Macrophage:monocyte-derived:IL-4/Dex/TGFb"
## [50] "Macrophage:monocyte-derived:IL-4/TGFb"
## [51] "Monocyte:leukotriene_D4"
## [52] "NK_cell"
## [53] "NK_cell:IL2"
## [54] "Embryonic_stem_cells"
## [55] "Tissue_stem_cells:iliac_MSC"
## [56] "Chondrocytes:MSC-derived"
## [57] "Osteoblasts"
## [58] "Tissue_stem_cells:BM_MSC"
## [59] "Osteoblasts:BMP2"
## [60] "Tissue_stem_cells:BM_MSC:BMP2"
## [61] "Tissue_stem_cells:BM_MSC:TGFb3"
## [62] "DC:monocyte-derived:Poly(IC)"
## [63] "DC:monocyte-derived:CD40L"
## [64] "DC:monocyte-derived:Schuler_treatment"
## [65] "DC:monocyte-derived:antiCD40/VAF347"
## [66] "Tissue_stem_cells:dental_pulp"
## [67] "T_cell:CD4+_central_memory"
## [68] "T_cell:CD4+_effector_memory"
## [69] "T_cell:CD4+_Naive"
## [70] "Smooth_muscle_cells:vascular"
## [71] "Smooth_muscle_cells:vascular:IL-17"
## [72] "Platelets"
## [73] "Epithelial_cells:bladder"
## [74] "Macrophage:monocyte-derived"
## [75] "Macrophage:monocyte-derived:M-CSF"
## [76] "Macrophage:monocyte-derived:M-CSF/IFNg"
## [77] "Macrophage:monocyte-derived:M-CSF/Pam3Cys"
## [78] "Macrophage:monocyte-derived:M-CSF/IFNg/Pam3Cys"
## [79] "Macrophage:monocyte-derived:IFNa"
## [80] "Gametocytes:oocyte"
## [81] "Monocyte:F._tularensis_novicida"
## [82] "Endothelial_cells:HUVEC:B._anthracis_LT"
## [83] "B_cell:Germinal_center"
## [84] "B_cell:Plasma_cell"
## [85] "B_cell:Naive"
## [86] "B_cell:Memory"
## [87] "DC:monocyte-derived:AEC-conditioned"
## [88] "Tissue_stem_cells:lipoma-derived_MSC"
## [89] "Tissue_stem_cells:adipose-derived_MSC_AM3"
## [90] "Endothelial_cells:HUVEC:FPV-infected"
## [91] "Endothelial_cells:HUVEC:PR8-infected"
## [92] "Endothelial_cells:HUVEC:H5N1-infected"
## [93] "Macrophage:monocyte-derived:S._aureus"
## [94] "Fibroblasts:foreskin"
## [95] "iPS_cells:skin_fibroblast-derived"
## [96] "iPS_cells:skin_fibroblast"
## [97] "T_cell:gamma-delta"
## [98] "Monocyte:CD14+"
## [99] "Macrophage:Alveolar"
## [100] "Macrophage:Alveolar:B._anthacis_spores"
## [101] "Neutrophil:inflam"
## [102] "iPS_cells:PDB_fibroblasts"
## [103] "iPS_cells:PDB_1lox-17Puro-5"
## [104] "iPS_cells:PDB_1lox-17Puro-10"
## [105] "iPS_cells:PDB_1lox-21Puro-20"
## [106] "iPS_cells:PDB_1lox-21Puro-26"
## [107] "iPS_cells:PDB_2lox-5"
## [108] "iPS_cells:PDB_2lox-22"
## [109] "iPS_cells:PDB_2lox-21"
## [110] "iPS_cells:PDB_2lox-17"
## [111] "iPS_cells:CRL2097_foreskin"
## [112] "iPS_cells:CRL2097_foreskin-derived:d20_hepatic_diff"
## [113] "iPS_cells:CRL2097_foreskin-derived:undiff."
## [114] "B_cell:CXCR4+_centroblast"
## [115] "B_cell:CXCR4-_centrocyte"
## [116] "Endothelial_cells:HUVEC:VEGF"
## [117] "iPS_cells:fibroblasts"
## [118] "iPS_cells:fibroblast-derived:Direct_del._reprog"
## [119] "iPS_cells:fibroblast-derived:Retroviral_transf"
## [120] "Endothelial_cells:lymphatic:KSHV"
## [121] "Endothelial_cells:blood_vessel"
## [122] "Monocyte:CD16-"
## [123] "Monocyte:CD16+"
## [124] "Tissue_stem_cells:BM_MSC:osteogenic"
## [125] "Hepatocytes"
## [126] "Neutrophil:uropathogenic_E._coli_UTI89"
## [127] "Neutrophil:commensal_E._coli_MG1655"
## [128] "MSC"
## [129] "Neuroepithelial_cell:ESC-derived"
## [130] "Astrocyte:Embryonic_stem_cell-derived"
## [131] "Endothelial_cells:HUVEC:IL-1b"
## [132] "HSC_CD34+"
## [133] "CMP"
## [134] "GMP"
## [135] "B_cell:immature"
## [136] "MEP"
## [137] "Myelocyte"
## [138] "Pre-B_cell_CD34-"
## [139] "Pro-B_cell_CD34+"
## [140] "Pro-Myelocyte"
## [141] "Smooth_muscle_cells:umbilical_vein"
## [142] "iPS_cells:foreskin_fibrobasts"
## [143] "iPS_cells:iPS:minicircle-derived"
## [144] "iPS_cells:adipose_stem_cells"
## [145] "iPS_cells:adipose_stem_cell-derived:lentiviral"
## [146] "iPS_cells:adipose_stem_cell-derived:minicircle-derived"
## [147] "Fibroblasts:breast"
## [148] "Monocyte:MCSF"
## [149] "Monocyte:CXCL4"
## [150] "Neurons:adrenal_medulla_cell_line"
## [151] "Tissue_stem_cells:CD326-CD56+"
## [152] "NK_cell:CD56hiCD62L+"
## [153] "T_cell:Treg:Naive"
## [154] "Neutrophil:LPS"
## [155] "Neutrophil:GM-CSF_IFNg"
## [156] "Monocyte:S._typhimurium_flagellin"
## [157] "Neurons:Schwann_cell"
install SingleR
#BiocManager::install("SingleR")
library(SingleR)
## Warning: package 'SingleR' was built under R version 4.4.1
##
## Attaching package: 'SingleR'
## The following objects are masked from 'package:celldex':
##
## BlueprintEncodeData, DatabaseImmuneCellExpressionData,
## HumanPrimaryCellAtlasData, ImmGenData, MonacoImmuneData,
## MouseRNAseqData, NovershternHematopoieticData
#BiocManager::install("scran")
library(scran)
## Warning: package 'scran' was built under R version 4.4.1
## Loading required package: SingleCellExperiment
## Warning: package 'SingleCellExperiment' was built under R version 4.4.1
## Loading required package: scuttle
## Warning: package 'scuttle' was built under R version 4.4.1
Run singleR on normalized data
ct_ann <- SingleR(test = norm_counts, # we could also use sce or raw_counts
ref = ref,
labels = ref$label.main,
de.method = 'wilcox')
## Warning in FUN(...): no within-block comparison between Neuroepithelial_cell
## and BM & Prog.
#library("magrittr")
ct_ann %>% head()
## DataFrame with 6 rows and 4 columns
## scores labels
## <matrix> <character>
## sam1_AAACCCAAGACGGTTG-1 0.403145:0.345071:0.349268:... Chondrocytes
## sam1_AAACCCAAGGTCACAG-1 0.166320:0.188311:0.200635:... Macrophage
## sam1_AAACCCAAGTGCGTCC-1 0.173666:0.196706:0.213181:... Pre-B_cell_CD34-
## sam1_AAACCCACACGCTATA-1 0.127044:0.168634:0.181017:... DC
## sam1_AAACCCAGTTTCCCAC-1 0.161358:0.172904:0.179275:... Macrophage
## sam1_AAACCCATCTTGATTC-1 0.201712:0.266665:0.320877:... Macrophage
## delta.next pruned.labels
## <numeric> <character>
## sam1_AAACCCAAGACGGTTG-1 0.053144843 Chondrocytes
## sam1_AAACCCAAGGTCACAG-1 0.004849993 Macrophage
## sam1_AAACCCAAGTGCGTCC-1 0.000941892 Pre-B_cell_CD34-
## sam1_AAACCCACACGCTATA-1 0.002488869 DC
## sam1_AAACCCAGTTTCCCAC-1 0.001034198 Macrophage
## sam1_AAACCCATCTTGATTC-1 0.204236805 Macrophage
unique(ct_ann$pruned.labels)
## [1] "Chondrocytes" "Macrophage" "Pre-B_cell_CD34-"
## [4] "DC" "Fibroblasts" "Pro-B_cell_CD34+"
## [7] "Epithelial_cells" "GMP" "MSC"
## [10] "T_cells" "B_cell" "NK_cell"
## [13] "Hepatocytes" "HSC_-G-CSF" "Monocyte"
## [16] "Smooth_muscle_cells" "HSC_CD34+" "Endothelial_cells"
## [19] NA "Pro-Myelocyte" "Neutrophils"
## [22] "Platelets" "Osteoblasts" "Keratinocytes"
## [25] "Tissue_stem_cells" "Embryonic_stem_cells" "Neurons"
## [28] "CMP" "Myelocyte" "iPS_cells"
## [31] "Neuroepithelial_cell" "BM" "Astrocyte"
## [34] "BM & Prog." "Erythroblast" "MEP"
## [37] "Gametocytes"
table(ct_ann$pruned.labels)
##
## Astrocyte B_cell BM
## 5 70 148
## BM & Prog. Chondrocytes CMP
## 13 129 152
## DC Embryonic_stem_cells Endothelial_cells
## 441 21 78
## Epithelial_cells Erythroblast Fibroblasts
## 1648 4 700
## Gametocytes GMP Hepatocytes
## 1 182 135
## HSC_-G-CSF HSC_CD34+ iPS_cells
## 182 53 9
## Keratinocytes Macrophage MEP
## 56 2964 8
## Monocyte MSC Myelocyte
## 312 219 26
## Neuroepithelial_cell Neurons Neutrophils
## 7 33 16
## NK_cell Osteoblasts Platelets
## 1367 15 6
## Pre-B_cell_CD34- Pro-B_cell_CD34+ Pro-Myelocyte
## 515 97 108
## Smooth_muscle_cells T_cells Tissue_stem_cells
## 935 1436 33
print("ct_ann$labels")
## [1] "ct_ann$labels"
table(ct_ann$labels)
##
## Astrocyte B_cell BM
## 6 70 148
## BM & Prog. Chondrocytes CMP
## 13 139 152
## DC Embryonic_stem_cells Endothelial_cells
## 441 21 79
## Epithelial_cells Erythroblast Fibroblasts
## 1648 4 758
## Gametocytes GMP Hepatocytes
## 1 182 135
## HSC_-G-CSF HSC_CD34+ iPS_cells
## 182 53 9
## Keratinocytes Macrophage MEP
## 56 3004 9
## Monocyte MSC Myelocyte
## 312 226 26
## Neuroepithelial_cell Neurons Neutrophils
## 7 33 16
## NK_cell Osteoblasts Platelets
## 1367 19 7
## Pre-B_cell_CD34- Pro-B_cell_CD34+ Pro-Myelocyte
## 515 97 110
## Smooth_muscle_cells T_cells Tissue_stem_cells
## 998 1439 33
summary(is.na(ct_ann$pruned.labels))
## Mode FALSE TRUE
## logical 12124 191
plotScoreHeatmap(ct_ann)
plotDeltaDistribution(ct_ann, ncol = 4, dots.on.top = FALSE)
## Warning: Groups with fewer than two datapoints have been dropped.
## ℹ Set `drop = FALSE` to consider such groups for position adjustment purposes.
## Warning in max(data$density, na.rm = TRUE): no non-missing arguments to max;
## returning -Inf
## Warning: Computation failed in `stat_ydensity()`.
## Caused by error in `$<-.data.frame`:
## ! replacement has 1 row, data has 0
rownames(ct_ann)[1:2] # make sure you have cell IDs
## [1] "sam1_AAACCCAAGACGGTTG-1" "sam1_AAACCCAAGGTCACAG-1"
Merged_sam<- AddMetaData(Merged_sam, ct_ann$pruned.labels, col.name = 'SingleR_HCA')
Merged_sam <- SetIdent(Merged_sam, value = "SingleR_HCA")
DimPlot(Merged_sam, label = T , repel = T, label.size = 3) + NoLegend()
## Warning: ggrepel: 12 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps